This R script is used to merge RS data downloaded the Google Drive folder shared by Bea with the ReSurvey database.
library(tidyverse)
library(here)
# Set the folder path
folder_path <- "C:/Data/MOTIVATE/MOTIVATE_RS_data/S2"
# List only CSV files that contain "max_Filtered" in their filename
csv_files <- list.files(folder_path, pattern = "max_Filtered.*\\.csv$",
full.names = TRUE, recursive = TRUE)
# Function to read each file and extract info from the filename
read_and_label <- function(file_path) {
file_name <- basename(file_path)
# Extract region and subregion from the filename
# Updated regular expression to handle the case where subregion is missing
components <- str_match(file_name,
"^[0-9_]*([^_]+)_([^_]+)_Sentinel.*?_(.*?).csv")
# Extract the biogeo and unit, handling missing subregion (unit)
biogeo <- components[2]
# If subregion (unit) is missing, set it as NA
unit <- ifelse(is.na(components[3]), NA, components[3])
# Check if biogeo is missing, and if so,
# assign the first part of the filename (region name)
if (is.na(biogeo) && grepl("Sentinel", file_name)) {
# Capture the first part (biogeo) directly
biogeo <- str_match(file_name, "^[0-9_]*([^_]+)_Sentinel")[2]
}
# If biogeo is still NA, print a warning
if (is.na(biogeo)) {
warning(paste("Failed to extract biogeo for file:", file_name))
}
# Read CSV and add columns for extracted info
read_csv(file_path) %>%
mutate(biogeo = biogeo, unit = unit)
}
# Read and merge all CSV files
data_RS_S2 <- map_dfr(csv_files, read_and_label)
Rows: 2442 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 268 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 64 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 689 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 3919 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 2639 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 3206 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 3187 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 3854 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 2440 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 388 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 2784 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 1 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 339 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 980 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 2948 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 175 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 1070 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 2345 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 247 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 2684 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 4293 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 4591 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 4479 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 4636 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 4318 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 4612 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 2569 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 358 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 762 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 15 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 31 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 2979 Columns: 10── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): system:index, source, .geo
dbl (7): Lat_update, Lon_update, NDMI, NDVI, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View the resulting tibble
print(data_RS_S2)
# Counts per biogeo and unit
print(data_RS_S2 %>% count(biogeo, unit), n = 100)
data_RS_S2 <- data_RS_S2 %>%
# Keep the columns we need
select(obs_unique, biogeo, unit, year, source, Lat_update, Lon_update,
NDVI, NDMI) %>%
# Rename Lat and Lon, these are only kept in case there is difference with
# those in the ReSurvey database due to updates based on Ilona's info
rename(Lat_RS = Lat_update, Lon_RS = Lon_update) %>%
# Same for year
rename(year_RS = year)
# Set the folder path
folder_path <- "C:/Data/MOTIVATE/MOTIVATE_RS_data/S2"
# List only CSV files that contain "NDVI_Phenology" in their filename
csv_files <- list.files(folder_path, pattern = "NDVI_Phenology.*\\.csv$",
full.names = TRUE, recursive = TRUE)
# Function to read each file and extract info from the filename
read_and_label <- function(file_path) {
file_name <- basename(file_path)
# Extract region and subregion from the filename
# Updated regular expression to handle the case where subregion is missing
components <- str_match(file_name,
"^[0-9_]*([^_]+)_([^_]+)_NDVI_Phenology.*?_(.*?).csv")
# Extract the biogeo and unit, handling missing subregion (unit)
biogeo <- components[2]
# If subregion (unit) is missing, set it as NA
unit <- ifelse(is.na(components[3]), NA, components[3])
# Check if biogeo is missing, and if so,
# assign the first part of the filename (region name)
if (is.na(biogeo) && grepl("_NDVI_Phenology", file_name)) {
# Capture the first part (biogeo) directly
biogeo <- str_match(file_name, "^[0-9_]*([^_]+)_NDVI_Phenology")[2]
}
# If biogeo is still NA, print a warning
if (is.na(biogeo)) {
warning(paste("Failed to extract biogeo for file:", file_name))
}
# Read CSV and add columns for extracted info
read_csv(file_path) %>%
mutate(biogeo = biogeo, unit = unit)
}
# Read and merge all CSV files
data_RS_S2_phen <- map_dfr(csv_files, read_and_label)
Rows: 2442 Columns: 14── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): system:index, .geo
dbl (9): EOS_DOY, Lat_update, Lon_update, Peak_DOY, SOS_DOY, obs_unique, plot_uniqu, t1, year
date (3): EOS_Date, Peak_Date, SOS_Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 268 Columns: 14── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): system:index, .geo
dbl (9): EOS_DOY, Lat_update, Lon_update, Peak_DOY, SOS_DOY, obs_unique, plot_uniqu, t1, year
date (3): EOS_Date, Peak_Date, SOS_Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 689 Columns: 14── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): system:index, .geo
dbl (9): EOS_DOY, Lat_update, Lon_update, Peak_DOY, SOS_DOY, obs_unique, plot_uniqu, t1, year
date (3): EOS_Date, Peak_Date, SOS_Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 980 Columns: 14── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): system:index, .geo
dbl (9): EOS_DOY, Lat_update, Lon_update, Peak_DOY, SOS_DOY, obs_unique, plot_uniqu, t1, year
date (3): EOS_Date, Peak_Date, SOS_Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 1 Columns: 14── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): system:index, .geo
dbl (9): EOS_DOY, Lat_update, Lon_update, Peak_DOY, SOS_DOY, obs_unique, plot_uniqu, t1, year
date (3): EOS_Date, Peak_Date, SOS_Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Rows: 339 Columns: 14── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): system:index, .geo
dbl (9): EOS_DOY, Lat_update, Lon_update, Peak_DOY, SOS_DOY, obs_unique, plot_uniqu, t1, year
date (3): EOS_Date, Peak_Date, SOS_Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View the resulting tibble
print(data_RS_S2_phen)
# Counts per biogeo and unit
print(data_RS_S2_phen %>% count(biogeo, unit), n = 100)
data_RS_S2_phen <- data_RS_S2_phen %>%
# Keep the columns we need
select(obs_unique, biogeo, unit, SOS_DOY, SOS_Date, Peak_DOY, Peak_Date,
EOS_DOY, EOS_Date)
# Remove Lat and Lon and year, in case there is difference with
# those in the ReSurvey database due to updates based on Ilona's info,
# we have Lat_RS, Lon_RS and year_RS from data_RS_S2
# Set the folder path
folder_path <- "C:/Data/MOTIVATE/MOTIVATE_RS_data/Landsat"
# List only CSV files that contain "Plot" in their filename
csv_files <- list.files(folder_path, pattern = "Plot.*\\.csv$",
full.names = TRUE, recursive = TRUE)
# Remove ALP_BAL so far cause there seems to be an error in that table
csv_files <- csv_files[-1]
# Define the expected column names
expected_columns <- c("system:index", "Lat_update", "Lon_update", "obs_unique",
"plot_uniqu", "source", "year", "EVI_max", "EVI_median",
"EVI_min", "EVI_p10", "EVI_p90", "EVI_stdDev", "EVImean",
"NDMI_max", "NDMI_median", "NDMI_min", "NDMI_p10",
"NDMI_p90", "NDMI_stdDev", "NDMImean", "NDVI_max",
"NDVI_median", "NDVI_min", "NDVI_p10", "NDVI_p90",
"NDVI_stdDev", "NDVImean", "NDWI_max", "NDWI_median",
"NDWI_min", "NDWI_p10", "NDWI_p90", "NDWI_stdDev",
"NDWImean", "SAVI_max", "SAVI_median", "SAVI_min",
"SAVI_p10", "SAVI_p90", "SAVI_stdDev", "SAVImean", ".geo")
# Define the column types
column_types <- cols(
`system:index` = col_character(), Lat_update = col_double(),
Lon_update = col_double(), obs_unique = col_double(),
plot_uniqu = col_character(), source = col_character(), year = col_integer(),
EVI_max = col_double(), EVI_median = col_double(), EVI_min = col_double(),
EVI_p10 = col_double(), EVI_p90 = col_double(), EVI_stdDev = col_double(),
EVImean = col_double(), NDMI_max = col_double(), NDMI_median = col_double(),
NDMI_min = col_double(), NDMI_p10 = col_double(), NDMI_p90 = col_double(),
NDMI_stdDev = col_double(), NDMImean = col_double(), NDVI_max = col_double(),
NDVI_median = col_double(), NDVI_min = col_double(), NDVI_p10 = col_double(),
NDVI_p90 = col_double(), NDVI_stdDev = col_double(), NDVImean = col_double(),
NDWI_max = col_double(), NDWI_median = col_double(), NDWI_min = col_double(),
NDWI_p10 = col_double(), NDWI_p90 = col_double(), NDWI_stdDev = col_double(),
NDWImean = col_double(), SAVI_max = col_double(), SAVI_median = col_double(),
SAVI_min = col_double(), SAVI_p10 = col_double(), SAVI_p90 = col_double(),
SAVI_stdDev = col_double(), SAVImean = col_double(), .geo = col_character()
)
# Function to read each file and extract info from the filename
read_and_label <- function(file_path) {
file_name <- basename(file_path)
# Extract region and subregion from the filename
# Updated regular expression to handle the case where subregion is missing
components <- str_match(file_name,
"^[0-9_]*([^_]+)_([^_]+)_Landsat.*?_(.*?).csv")
# Extract the biogeo and unit, handling missing subregion (unit)
biogeo <- components[2]
# If subregion (unit) is missing, set it as NA
unit <- ifelse(is.na(components[3]), NA, components[3])
# Check if biogeo is missing, and if so,
# assign the first part of the filename (region name)
if (is.na(biogeo) && grepl("Landsat", file_name)) {
# Capture the first part (biogeo) directly
biogeo <- str_match(file_name, "^[0-9_]*([^_]+)_Landsat")[2]
}
# If biogeo is still NA, print a warning
if (is.na(biogeo)) {
warning(paste("Failed to extract biogeo for file:", file_name))
}
delimiter <- ifelse(grepl(";", readLines(file_path, n = 1)), ";", ",")
# Read CSV and add columns for extracted info
data <- read_delim(file_path, delim = delimiter, col_types = column_types) %>%
mutate(biogeo = biogeo, unit = unit)
# Reorder columns based on expected columns
data <- data %>%
select(all_of(expected_columns), everything())
return(data)
}
# Read and merge all CSV files
data_RS_Landsat <- map_dfr(csv_files, read_and_label)
# View the resulting tibble
print(data_RS_Landsat)
# Counts per biogeo and unit
print(data_RS_Landsat %>% count(biogeo, unit), n = 100)
data_RS_Landsat <- data_RS_Landsat %>%
# Keep the columns we need
select(obs_unique, biogeo, unit, year, source, Lat_update, Lon_update,
NDVI_max, NDMI_max) %>% # Keep only these two so far
# Rename those as NDVI and NDMI to agree with S2 data
rename(NDVI = NDVI_max, NDMI = NDMI_max) %>%
# Rename Lat and Lon, these are only kept in case there is differrence with
# those in the ReSurvey database due to updates based on Ilona's info
rename(Lat_RS = Lat_update, Lon_RS = Lon_update) %>%
# Same for year
rename(year_RS = year)
data_RS_CH <- read_csv(
"C:/Data/MOTIVATE/MOTIVATE_RS_data/Canopy_Height_1m/Europe_points_CanopyHeight_1m.csv")
Rows: 425310 Columns: 8── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): system:index, .geo
dbl (6): Lat_update, Lon_update, canopy_height, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_RS_CH
data_RS_CH <- data_RS_CH %>%
# Keep the columns we need
select(obs_unique, canopy_height)
In this file, there is the correspondence obs_unique - PlotObservationID.
db_Europa <- read_csv(
here("..", "DB_first_check", "data", "clean","db_Europa_20250107.csv")
)
Rows: 425310 Columns: 12── Column specification ────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): Country, RS_CODE, ReSurvey site, ReSurvey plot, Expert System, Location method
dbl (6): PlotObservationID, Lon_updated, Lat_updated, plot_unique_id, year, obs_unique_id
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Get only the columns PlotObservationID (original unique identifier) obs_unique_id (unique identified created by me) and year.
db_Europa <- db_Europa %>% select(PlotObservationID, obs_unique_id)
data_RS_S2_ID <- db_Europa %>%
right_join(data_RS_S2 %>%
# Rename to be able to join on this column
rename(obs_unique_id = obs_unique))
Joining with `by = join_by(obs_unique_id)`
Now we have PlotObservationID in data_RS_S2_ID.
data_RS_S2_phen_ID <- db_Europa %>%
right_join(data_RS_S2_phen %>%
# Rename to be able to join on this column
rename(obs_unique_id = obs_unique))
Joining with `by = join_by(obs_unique_id)`
Now we have PlotObservationID in data_RS_S2_phen_ID
data_RS_Landsat_ID <- db_Europa %>%
right_join(data_RS_Landsat %>%
# Rename to be able to join on this column
rename(obs_unique_id = obs_unique))
Joining with `by = join_by(obs_unique_id)`
Now we have PlotObservationID in data_RS_Landsat_ID.
data_RS_CH_ID <- db_Europa %>%
right_join(data_RS_CH %>%
# Rename to be able to join on this column
rename(obs_unique_id = obs_unique))
Joining with `by = join_by(obs_unique_id)`
Now we have PlotObservationID in data_RS_CH_ID.
This is the ReSurvey database after updates (to be continued).
db_resurv <- read_tsv(
here("..", "DB_first_check","data", "clean","db_resurv_updated_clean.csv"),
col_types = cols(
# Dynamically specify EUNIS columns as character
.default = col_guess(), # Default guessing for other columns
EUNISa = col_character(),
EUNISb = col_character(),
EUNISc = col_character(),
EUNISd = col_character(),
EUNISa_1 = col_character(),
EUNISa_2 = col_character(),
EUNISa_3 = col_character(),
EUNISa_4 = col_character(),
EUNISb_1 = col_character(),
EUNISb_2 = col_character(),
EUNISb_3 = col_character(),
EUNISb_4 = col_character(),
EUNISc_1 = col_character(),
EUNISc_2 = col_character(),
EUNISc_3 = col_character(),
EUNISc_4 = col_character(),
EUNISd_1 = col_character(),
EUNISd_2 = col_character(),
EUNISd_3 = col_character(),
EUNISd_4 = col_character(),
EUNISa_1_descr = col_character(),
EUNISb_1_descr = col_character(),
EUNISc_1_descr = col_character(),
EUNISd_1_descr = col_character(),
EUNIS_assignation = col_character(),
EUNISa_2_descr = col_character(),
EUNISa_3_descr = col_character(),
EUNISa_4_descr = col_character(),
EUNISb_2_descr = col_character(),
EUNISb_3_descr = col_character(),
EUNISb_4_descr = col_character(),
EUNISc_2_descr = col_character(),
EUNISc_3_descr = col_character(),
EUNISc_4_descr = col_character(),
EUNISd_2_descr = col_character(),
EUNISd_3_descr = col_character(),
EUNISd_4_descr = col_character()
)
)
No parsing issues!
For some points, there is data (NDVI and NDMI so far) both from S2 and Landsat. In those cases, use the S2 data because it is more precise (10 m vs 30 m).
data_RS_S2_ID <- data_RS_S2_ID %>%
rename(NDVI_S2 = NDVI, NDMI_S2 = NDMI) %>%
select(-source)
data_RS_Landsat_ID <- data_RS_Landsat_ID %>%
rename(NDVI_Landsat = NDVI, NDMI_Landsat = NDMI) %>%
select(-source)
Join S2, S2_phen and Landsat data:
data_RS <- data_RS_S2_ID %>%
full_join(data_RS_S2_phen_ID) %>%
full_join(data_RS_Landsat_ID)
Joining with `by = join_by(PlotObservationID, obs_unique_id, biogeo, unit)`Joining with `by = join_by(PlotObservationID, obs_unique_id, biogeo, unit, year_RS, Lat_RS, Lon_RS)`
Number of observations with NDVI data from both S2 and Landsat:
nrow(data_RS %>% filter(!is.na(NDVI_S2) & !is.na(NDVI_Landsat)))
[1] 493
Send points in .csv to Bea.
write.csv(data_RS %>% filter(!is.na(NDVI_S2) & !is.na(NDVI_Landsat)),
file = here("data", "clean", "points_NDVI_S2_Landsat.csv"))
Difference between NDVI values from S2 and Landsat:
data_RS %>% filter(!is.na(NDVI_S2) & !is.na(NDVI_Landsat)) %>%
mutate(diff_NDVI = NDVI_S2 - NDVI_Landsat) %>%
ggplot(aes(x = diff_NDVI)) + geom_histogram(color = "black", fill = "white")
data_RS %>% filter(!is.na(NDMI_S2) & !is.na(NDMI_Landsat)) %>%
mutate(diff_NDMI = NDMI_S2 - NDMI_Landsat) %>%
ggplot(aes(x = diff_NDMI)) + geom_histogram(color = "black", fill = "white")
There is a large difference between NDVI values from S2 and Landsat. So far, use the S2 data, but check with Bea.
When NDVI and NDMI values are available from both satellites, use S2:
data_RS <- data_RS %>%
mutate(NDVI =
case_when(
is.na(NDVI_S2) & is.na(NDVI_Landsat) ~ NA_real_,
is.na(NDVI_Landsat) ~ NDVI_S2,
is.na(NDVI_S2) ~ NDVI_Landsat,
TRUE ~ NDVI_S2),
NDMI =
case_when(
is.na(NDMI_S2) & is.na(NDMI_Landsat) ~ NA_real_,
is.na(NDMI_Landsat) ~ NDMI_S2,
is.na(NDMI_S2) ~ NDMI_Landsat,
TRUE ~ NDMI_S2),
)
db_resurv_RS <- db_resurv %>%
left_join(data_RS %>% select(-obs_unique_id)) %>%
left_join(data_RS_CH_ID %>% select(-obs_unique_id)) %>%
mutate(S2_data = !is.na(NDVI_S2) & !is.na(NDMI_S2),
Landsat_data = !is.na(NDVI_Landsat) & !is.na(NDMI_Landsat),
CH_data = !is.na(canopy_height))
Joining with `by = join_by(PlotObservationID)`Joining with `by = join_by(PlotObservationID)`
db_resurv_RS %>% count(S2_data)
db_resurv_RS %>% count(Landsat_data)
db_resurv_RS %>% count(CH_data)
Save clean file for analyses (to be updated continuously due to updates in ReSurvey database and updates on RS data).
write_tsv(db_resurv_RS,here("data", "clean","db_resurv_RS_20250327.csv"))
sessionInfo()
R version 4.4.2 (2024-10-31 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 26100)
Matrix products: default
locale:
[1] LC_COLLATE=Spanish_Spain.utf8 LC_CTYPE=Spanish_Spain.utf8 LC_MONETARY=Spanish_Spain.utf8
[4] LC_NUMERIC=C LC_TIME=Spanish_Spain.utf8
time zone: Europe/Madrid
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] randomForestExplainer_0.10.1 pROC_1.18.5 caret_7.0-1
[4] lattice_0.22-6 randomForest_4.7-1.2 rnaturalearth_1.0.1
[7] sf_1.0-19 scales_1.3.0 readxl_1.4.3
[10] gridExtra_2.3 here_1.0.1 lubridate_1.9.4
[13] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[16] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
[19] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] DBI_1.2.3 rlang_1.1.5 magrittr_2.0.3 e1071_1.7-16
[5] compiler_4.4.2 reshape2_1.4.4 systemfonts_1.2.1 vctrs_0.6.5
[9] pkgconfig_2.0.3 crayon_1.5.3 fastmap_1.2.0 labeling_0.4.3
[13] utf8_1.2.4 rmarkdown_2.29 prodlim_2024.06.25 tzdb_0.4.0
[17] ragg_1.3.3 bit_4.5.0.1 xfun_0.50 cachem_1.1.0
[21] jsonlite_1.8.9 recipes_1.1.1 terra_1.8-15 parallel_4.4.2
[25] R6_2.5.1 RColorBrewer_1.1-3 bslib_0.9.0 stringi_1.8.4
[29] GGally_2.2.1 parallelly_1.42.0 pkgload_1.4.0 rpart_4.1.23
[33] jquerylib_0.1.4 cellranger_1.1.0 Rcpp_1.0.14 iterators_1.0.14
[37] knitr_1.49 future.apply_1.11.3 Matrix_1.7-1 splines_4.4.2
[41] nnet_7.3-19 timechange_0.3.0 tidyselect_1.2.1 rstudioapi_0.17.1
[45] yaml_2.3.10 timeDate_4041.110 codetools_0.2-20 listenv_0.9.1
[49] plyr_1.8.9 withr_3.0.2 evaluate_1.0.3 future_1.34.0
[53] survival_3.7-0 ggstats_0.8.0 rnaturalearthdata_1.0.0 units_0.8-5
[57] proxy_0.4-27 pillar_1.10.1 KernSmooth_2.23-24 DT_0.33
[61] stats4_4.4.2 foreach_1.5.2 generics_0.1.3 vroom_1.6.5
[65] rprojroot_2.0.4 hms_1.1.3 munsell_0.5.1 globals_0.16.3
[69] class_7.3-22 glue_1.8.0 tools_4.4.2 data.table_1.16.4
[73] ModelMetrics_1.2.2.2 gower_1.0.2 grid_4.4.2 ipred_0.9-15
[77] colorspace_2.1-1 nlme_3.1-166 cli_3.6.3 textshaping_1.0.0
[81] viridisLite_0.4.2 lava_1.8.1 gtable_0.3.6 sass_0.4.9
[85] digest_0.6.37 classInt_0.4-11 ggrepel_0.9.6 htmlwidgets_1.6.4
[89] farver_2.1.2 htmltools_0.5.8.1 lifecycle_1.0.4 hardhat_1.4.1
[93] httr_1.4.7 bit64_4.6.0-1 MASS_7.3-61